package us.codecraft.webmagic.lianjia.processor;

import com.alibaba.fastjson.JSONObject;
import jdk.nashorn.internal.parser.JSONParser;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import java.util.List;

public class GetPageUrlProcessor implements PageProcessor {


    private Site site;

    private String domain;

    private String ulsSelectable;

    private String nameSelectable;
    private String urlSelectable;

    public GetPageUrlProcessor(String domainUrl, String ulsSelectable, String nameSelectable, String urlSelectable) {
        this.domain = domainUrl;
        this.ulsSelectable = ulsSelectable;
        this.nameSelectable = nameSelectable;
        this.urlSelectable = urlSelectable;
        this.site = Site
                .me()
                .setDomain(domainUrl)
                .setSleepTime(3000)
                .setUserAgent(
                        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
    }

    public void process(Page page) {
        Selectable endPage = page.getHtml().$(ulsSelectable);
        String url = domain + endPage.xpath(urlSelectable).toString();
        String name = endPage.xpath(nameSelectable).toString();
        JSONObject jsonObj = JSONObject.parseObject(name);
        int totalPage = jsonObj.getInteger("totalPage");
        System.out.println(totalPage + "[" + url + "]");
    }

    public Site getSite() {
        return site;
    }

    public static void main(String[] args) {
        String domain = "https://sh.lianjia.com";
        String url = "https://sh.lianjia.com/ershoufang/biyun/";
        String ulsSelectable = "div.leftContent > div.contentBottom.clear > div.page-box.fr > div";
        String nameSelectable = "//div/@page-data";
        String urlSelectable = "//div/@page-url";

        Spider.create(new GetPageUrlProcessor(domain, ulsSelectable, nameSelectable, urlSelectable)).addUrl(url)
                .addPipeline(new ConsolePipeline()).run();
    }
}
